Exploratory Analysis

We will start with reading and imputing missing values using median imputation:

expr <- as.data.frame(t(read.delim("CLL_mRNA.txt", header = TRUE, sep="\t")))
for(i in 1:ncol(expr)){expr[,i][is.na(expr[,i])]<-median(expr[,i],na.rm=TRUE)}
expr[1:5,1:5]
##      ENSG00000244734 ENSG00000158528 ENSG00000198478 ENSG00000175445
## H045        4.558644       11.741854        8.921456       12.686458
## H109        2.721512       13.287432        2.721512       10.925985
## H024        9.938456        2.341006       12.381452        1.528848
## H056       13.278004        3.232874        8.106266        1.528848
## H079        6.086874       11.940820        4.889503       13.340588
##      ENSG00000174469
## H045        2.644946
## H109       12.648355
## H024        1.528848
## H056       13.565210
## H079        5.476914
mut <- as.data.frame(t(read.delim("CLL_Mutations.txt", header = TRUE, sep="\t")))
for(i in 1:ncol(mut)){mut[,i][is.na(mut[,i])]<-median(mut[,i],na.rm=TRUE)}
mut[1:5,1:5]
##      gain2p25.3 gain3q26 del6p21.2 del6q21 del8p12
## H045          0        0         0       0       0
## H109          0        0         0       0       0
## H024          0        0         0       0       0
## H056          0        0         0       0       0
## H079          1        0         0       0       0
meth <- as.data.frame(t(read.delim("CLL_Methylation.txt", header = TRUE, sep="\t")))
for(i in 1:ncol(meth)){meth[,i][is.na(meth[,i])]<-median(meth[,i],na.rm=TRUE)}
meth[1:5,1:5]
##       cg10146935 cg26837773 cg17801765 cg13244315 cg06181703
## H045  1.81108585 -5.1725723  5.4115263 -0.1188251  5.1203838
## H109 -3.99750846  1.5948702  5.4126925  1.0438706  1.2794803
## H024 -2.84431298  0.1611705  0.3657059 -4.2192362  0.7211004
## H056 -3.33865611 -2.0934326  0.3736342 -1.5921965  4.0470594
## H079 -0.01936203  3.7489796  5.4120096  1.4164183  5.2374225
drug <- as.data.frame(t(read.delim("CLL_Drugs.txt", header = TRUE, sep="\t")))
for(i in 1:ncol(drug)){drug[,i][is.na(drug[,i])]<-median(drug[,i],na.rm=TRUE)}
drug[1:5,1:5]
##         D_001_1    D_001_2   D_001_3   D_001_4   D_001_5
## H045 0.02363938 0.04623274 0.3187471 0.8237027 0.8962777
## H109 0.07359900 0.10623002 0.2732891 0.7171379 0.8850003
## H024 0.04426905 0.08425647 0.3187606 0.7797544 0.8974880
## H056 0.05813930 0.09022028 0.2322145 0.7225736 0.7957497
## H079 0.02042077 0.04750543 0.3638962 0.8073907 0.8794886

Now let us have a look at the phenotypic data, extract Geneder and convert it to a variable Y that is going to be used later when running PLS-DA analysis:

phen <- read.delim("CLL_Covariates.txt", header = TRUE, sep="\t")
head(phen)
##      Gender Diagnosis
## H045      m       CLL
## H109      m       CLL
## H024      m       CLL
## H056      m       CLL
## H079      m       CLL
## H164      f       CLL
Y<-factor(phen$Gender)
summary(Y)
##   f   m 
##  79 121

Let us split the data set into train and test sub-sets. We select 60 samples (30%) for testing. Since we have more males than females (fraction of females is approximately 0.4) we select 36 males and 24 females.

set.seed(1234)
female_sample<-rownames(expr)[as.character(phen$Gender)=="f"][sample(1:length(rownames(expr)[as.character(phen$Gender)=="f"]),24)]
female_sample
##  [1] "H102" "H040" "H165" "H018" "H101" "H096" "H033" "H210" "H225" "H170"
## [11] "H181" "H182" "H204" "H023" "H051" "H031" "H209" "H035" "H081" "H029"
## [21] "H014" "H186" "H166" "H167"
male_sample<-rownames(expr)[as.character(phen$Gender)=="m"][sample(1:length(rownames(expr)[as.character(phen$Gender)=="m"]),36)]
male_sample
##  [1] "H016" "H044" "H032" "H055" "H254" "H024" "H183" "H030" "H252" "H272"
## [11] "H231" "H107" "H094" "H109" "H260" "H078" "H080" "H208" "H072" "H020"
## [21] "H059" "H235" "H206" "H202" "H106" "H087" "H049" "H104" "H038" "H037"
## [31] "H250" "H191" "H017" "H176" "H070" "H178"
expr_test<-expr[match(c(female_sample,male_sample),rownames(expr)),]
expr_test[1:5,1:5]
##      ENSG00000244734 ENSG00000158528 ENSG00000198478 ENSG00000175445
## H102        7.320581       12.434167        6.783195       11.336763
## H040        4.769423        2.381844        2.381844        5.459589
## H165        2.529650        2.529650        4.073655       10.346617
## H018        4.184367        1.528848        8.195382        2.911501
## H101        7.049108       11.335080        5.101373        9.256292
##      ENSG00000174469
## H102        8.629901
## H040       10.952705
## H165        8.363989
## H018       12.530168
## H101        7.540800
meth_test<-meth[match(c(female_sample,male_sample),rownames(meth)),]
meth_test[1:5,1:5]
##       cg10146935  cg26837773 cg17801765 cg13244315  cg06181703
## H102  0.02408192 -2.66149768  4.7975506 -2.0793298 -0.09300816
## H040 -4.40691148 -5.22192499  5.3746764  3.2587619  3.01769727
## H165  0.59337171 -0.03963969  0.5020684  3.8726517  3.49487818
## H018 -3.67883824 -3.52009661  2.0600774 -1.0636796  0.37493793
## H101 -2.91823259  4.62748491  1.7367315  0.1692539  5.36798425
mut_test<-mut[match(c(female_sample,male_sample),rownames(mut)),]
mut_test[1:5,1:5]
##      gain2p25.3 gain3q26 del6p21.2 del6q21 del8p12
## H102          0        0         0       0       0
## H040          0        0         0       0       0
## H165          1        0         0       0       0
## H018          0        0         0       0       0
## H101          0        0         0       0       0
drug_test<-drug[match(c(female_sample,male_sample),rownames(drug)),]
drug_test[1:5,1:5]
##         D_001_1    D_001_2   D_001_3   D_001_4   D_001_5
## H102 0.08906634 0.13920632 0.2337110 0.7513949 0.7672126
## H040 0.01908389 0.02544977 0.4947984 0.9673132 1.0000992
## H165 0.03234895 0.04840093 0.2723969 0.7585817 0.9079429
## H018 0.04426905 0.08425647 0.3187606 0.7797544 0.8974880
## H101 0.07350703 0.12265018 0.2543330 0.7255191 0.7361780
Y.test<-Y[match(c(female_sample,male_sample),rownames(phen))]
Y.test
##  [1] f f f f f f f f f f f f f f f f f f f f f f f f m m m m m m m m m m m
## [36] m m m m m m m m m m m m m m m m m m m m m m m m m
## Levels: f m
summary(Y.test)
##  f  m 
## 24 36
length(Y.test)
## [1] 60
expr<-expr[!rownames(expr)%in%rownames(expr_test),]
meth<-meth[!rownames(meth)%in%rownames(meth_test),]
mut<-mut[!rownames(mut)%in%rownames(mut_test),]
drug<-drug[!rownames(drug)%in%rownames(drug_test),]

Y<-Y[!rownames(phen)%in%c(female_sample,male_sample)]
Y
##   [1] m m m f m m m m m m m m m f f f f m f m m m m m m f m f m m f m m m f
##  [36] f f m m m f m m f f m m m m f f m m f m f f f m f m m m m f f f m f m
##  [71] f m f m f f m f f m m f m f m m m f m m m f m f f m m m m f m m m m m
## [106] f m f m m f f m f f f m m f f f m m m f m m m f m f m f m m m m f m f
## Levels: f m
length(Y)
## [1] 140
summary(Y)
##  f  m 
## 55 85

Since mutations represent a binary data, there is always a lack of variation due to coding with 0 and 1. Therefore, we will prefilter the mutation matrix by excluding sites with variance across individuals close to zero:

library("mixOmics")
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: ggplot2
## 
## Loaded mixOmics 6.3.1
## 
## Visit http://www.mixOmics.org for more details about our methods.
## Any bug reports or comments? Notify us at mixomics at math.univ-toulouse.fr or https://bitbucket.org/klecao/package-mixomics/issues
## 
## Thank you for using mixOmics!
library("matrixStats")
my_nearZeroVar<-nearZeroVar(mut)
head(my_nearZeroVar$Metrics)
##            freqRatio percentUnique
## gain2p25.3  27.00000      1.428571
## gain3q26    69.00000      1.428571
## del6p21.2   69.00000      1.428571
## del6q21     22.33333      1.428571
## del8p12     45.66667      1.428571
## gain8q24    27.00000      1.428571
dim(my_nearZeroVar$Metrics)
## [1] 59  2
mut <- mut[,-which(colnames(mut)%in%rownames(my_nearZeroVar$Metrics))]
mut[1:5,1:5]
##      del11q22.3 trisomy12 del13q14_any del17p13 BRAF
## H045          1         0            0        0    0
## H056          0         0            1        0    0
## H079          1         0            0        0    0
## H164          1         0            1        0    0
## H113          0         1            1        0    0
dim(mut)
## [1] 140  10
mut_test<-subset(mut_test,select=colnames(mut))

Therefore we end up with just a few mutations which have enough variation for the PLS-DA model. Later perhaps it makes sense to include all of them and not select informative ones using the sparse PLS-DA algorithm.

Feature Selection for OMICs Integration

Right now we can not integrate the OMICs data sets since at least expression and methylation data are high-dimensional so we need to perform a feature selection for those two OMICs. Here we use LASSO for doing feature selection for gene expression data:

library("glmnet")
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-18
lasso_fit <- cv.glmnet(as.matrix(expr), Y, family = "binomial", alpha = 1)
plot(lasso_fit)

coef <- predict(lasso_fit, s = "lambda.min", type = "nonzero")
colnames(expr)[unlist(coef)]
##  [1] "ENSG00000198822" "ENSG00000176769" "ENSG00000150457"
##  [4] "ENSG00000184368" "ENSG00000133134" "ENSG00000235268"
##  [7] "ENSG00000001626" "ENSG00000083290" "ENSG00000172139"
## [10] "ENSG00000140403" "ENSG00000136205" "ENSG00000186871"
## [13] "ENSG00000106633" "ENSG00000147113" "ENSG00000204291"
## [16] "ENSG00000067798" "ENSG00000162551" "ENSG00000049246"
## [19] "ENSG00000196739" "ENSG00000088053" "ENSG00000149150"
## [22] "ENSG00000162627" "ENSG00000185666" "ENSG00000167701"
## [25] "ENSG00000187608" "ENSG00000169330" "ENSG00000106025"
## [28] "ENSG00000240403" "ENSG00000130508" "ENSG00000142515"
## [31] "ENSG00000213160" "ENSG00000078114"
result_expr <- data.frame(GENE = names(as.matrix(coef(lasso_fit, s = "lambda.min"))
                                [as.matrix(coef(lasso_fit, s = "lambda.min"))[,1]!=0, 1])[-1], 
                   SCORE = as.numeric(as.matrix(coef(lasso_fit, s = "lambda.min"))
                                      [as.matrix(coef(lasso_fit, 
                                                      s = "lambda.min"))[,1]!=0, 1])[-1])
result_expr <- result_expr[order(-abs(result_expr$SCORE)),]
print(head(result_expr,10))
##               GENE      SCORE
## 14 ENSG00000147113 -1.4122016
## 4  ENSG00000184368 -1.0724494
## 12 ENSG00000186871 -0.8856169
## 29 ENSG00000130508  0.3822835
## 21 ENSG00000149150 -0.3387733
## 23 ENSG00000185666  0.3002490
## 2  ENSG00000176769  0.2667967
## 30 ENSG00000142515  0.2633780
## 20 ENSG00000088053 -0.2163359
## 25 ENSG00000187608 -0.1967797
print(as.character(result_expr$GENE))
##  [1] "ENSG00000147113" "ENSG00000184368" "ENSG00000186871"
##  [4] "ENSG00000130508" "ENSG00000149150" "ENSG00000185666"
##  [7] "ENSG00000176769" "ENSG00000142515" "ENSG00000088053"
## [10] "ENSG00000187608" "ENSG00000162627" "ENSG00000240403"
## [13] "ENSG00000235268" "ENSG00000150457" "ENSG00000169330"
## [16] "ENSG00000001626" "ENSG00000213160" "ENSG00000140403"
## [19] "ENSG00000198822" "ENSG00000106025" "ENSG00000136205"
## [22] "ENSG00000083290" "ENSG00000167701" "ENSG00000204291"
## [25] "ENSG00000162551" "ENSG00000196739" "ENSG00000078114"
## [28] "ENSG00000172139" "ENSG00000049246" "ENSG00000067798"
## [31] "ENSG00000133134" "ENSG00000106633"
expr <- subset(expr, select = as.character(result_expr$GENE))
expr_test<-subset(expr_test,select=colnames(expr))

ens2gs<-read.delim("ENSEMBLE_TO_GENE_SYMBOL_COORD.txt",header=TRUE,sep="\t")
ens2gs<-ens2gs[match(colnames(expr),as.character(ens2gs$ensembl_gene_id)),]
colnames(expr)<-ens2gs$external_gene_name
colnames(expr_test)<-ens2gs$external_gene_name

We have also converted the Ensembl gene IDs into gene names on the fly for clarity. Next we proceed with LASSO feature selection for methylation data:

library("glmnet")
lasso_fit <- cv.glmnet(as.matrix(meth), Y, family = "binomial", alpha = 1)
plot(lasso_fit)

coef <- predict(lasso_fit, s = "lambda.min", type = "nonzero")
colnames(meth)[unlist(coef)]
##  [1] "cg10219816" "cg08715595" "cg03441844" "cg09557387" "cg17277199"
##  [6] "cg20408276" "cg11669516" "cg06193597" "cg24576535" "cg00335715"
## [11] "cg05495546" "cg06847624" "cg00642460" "cg18136963" "cg15013257"
## [16] "cg10409981" "cg11218091" "cg11216554" "cg08214808" "cg27373604"
## [21] "cg03126946" "cg02299497" "cg01517680" "cg02399249" "cg12744859"
## [26] "cg08065231" "cg22195627" "cg14815891"
result_meth <- data.frame(CPG = names(as.matrix(coef(lasso_fit, s = "lambda.min"))
                                      [as.matrix(coef(lasso_fit, s = "lambda.min"))[,1]!=0, 1])[-1], 
                          SCORE = as.numeric(as.matrix(coef(lasso_fit, s = "lambda.min"))
                                             [as.matrix(coef(lasso_fit, s = "lambda.min"))[,1]!=0, 1])[-1])
result_meth <- result_meth[order(-abs(result_meth$SCORE)),]
print(head(result_meth,10))
##           CPG       SCORE
## 10 cg00335715 -0.06256381
## 13 cg00642460 -0.05333048
## 19 cg08214808 -0.04765747
## 4  cg09557387 -0.04195541
## 11 cg05495546 -0.03922580
## 12 cg06847624 -0.03801790
## 27 cg22195627  0.03788744
## 2  cg08715595 -0.03788461
## 25 cg12744859 -0.03677530
## 28 cg14815891 -0.03604474
print(as.character(result_meth$CPG))
##  [1] "cg00335715" "cg00642460" "cg08214808" "cg09557387" "cg05495546"
##  [6] "cg06847624" "cg22195627" "cg08715595" "cg12744859" "cg14815891"
## [11] "cg20408276" "cg10409981" "cg03126946" "cg10219816" "cg18136963"
## [16] "cg11669516" "cg11218091" "cg15013257" "cg03441844" "cg17277199"
## [21] "cg01517680" "cg11216554" "cg06193597" "cg02299497" "cg02399249"
## [26] "cg27373604" "cg24576535" "cg08065231"
meth <- subset(meth, select = as.character(result_meth$CPG))
meth_test <- subset(meth_test, select = as.character(result_meth$CPG))

We can see that we dramatically decreased the number of dimensions / features in the gene expression and methylation data sets. Now we can proceed with OMICs integration via PLS-DA algorithm.

Integrative OMICs Analysis

Now we will start integrating the four OMICs: 1) gene expression, 2) methylation and 3) mutations and 4) drug response. For this purpose we will concatenate gene expression, methylation, mutation and drug response matrices into X matrix and use the Gender as Y variable, so it is a typical Machine Learning setup: y=f(x), where x is the input, y is the class labels of individuals and the f-function is learnt from the data. Note that the f-function is a-priori linear for PLS-DA and non-linear for e.g. artificial neural networks.

data<-list(expr=expr,mut=mut,meth=meth,drug=drug)
names(data)
## [1] "expr" "mut"  "meth" "drug"
lapply(data, dim) 
## $expr
## [1] 140  32
## 
## $mut
## [1] 140  10
## 
## $meth
## [1] 140  28
## 
## $drug
## [1] 140 310

First, we fit a DIABLO model without variable selection to assess the global performance and choose the number of components for the final DIABLO model. The function perf is run with nfold-fold cross validation repeated nrepeat times. The matrix design sets sexpected covariance between the OMICs. The values may range between 0 (no correlation) to 1 (correlation to maximize) and the design can be chosen based on prior knowledge. Here due to the lack of prior knowledge we select a strong correlation 1 between the OMICs.

library("mixOmics")
design=matrix(1,ncol=length(data),nrow=length(data),dimnames=list(names(data),names(data)))
diag(design)=0
design
##      expr mut meth drug
## expr    0   1    1    1
## mut     1   0    1    1
## meth    1   1    0    1
## drug    1   1    1    0
splsda.res = block.splsda(X = data, Y = Y, ncomp = 8, design = design)
## Design matrix has changed to include Y; each block will be linked to Y.
perf.diablo = perf(splsda.res, validation = 'Mfold', folds = 2, nrepeat = 5, progressBar=FALSE, cpus=4)
perf.diablo
## 
## Call:
##  perf.sgccda(object = splsda.res, validation = "Mfold", folds = 2, nrepeat = 5, cpus = 4, progressBar = FALSE) 
## 
##  Main numerical outputs: 
##  -------------------- 
##  Error rate (overall or BER) for each component and for each distance: see object$error.rate 
##  Error rate per class, for each component and for each distance: see object$error.rate.class 
##  Prediction values for each component: see object$predict 
##  Classification of each sample, for each component and for each distance: see object$class 
##  Stable features on each component: see object$features$stable 
##  AUC values: see object$auc if auc = TRUE 
## 
##  Visualisation Functions: 
##  -------------------- 
##  plot
plot(perf.diablo,overlay='dist',sd=TRUE)

The BER (balanced error rate, which is our case since we have more males than females) mahalanobis distance seems to reach its minimum / plateau at ncomp=2 and does not seem to change further, we will use this number as an optimal number of PLS components to keep in the further downstream analysis.

Let us perform tuning of the model which implies selecting most informative variables in all layers of the data by LASSO algorithm. For this purpose we will need to define a design matrix which shows a-priori correlation between phenotypes and gene expression. We assume strong correlation, but one can in principle play with this parameter and find out how it influences the final result. Further, we will concentrate on the first two principal components for simplicity, the more precise number of principal components to select can be investigated via minimizing the balanced error rate in the cross-validation procedure.

library("mixOmics")
test.keepX=list("expr"=c(1:5),"mut"=c(1:5),"meth"=c(1:5),"drug"=c(1:5))
ptm<-proc.time()
tune.omics=tune.block.splsda(X=data,Y=Y,ncomp=2,test.keepX=test.keepX,design=design,cpus=4,progressBar=FALSE,validation="Mfold",folds=2,nrepeat=5,near.zero.var=FALSE,dist = "mahalanobis.dist")
## You have provided a sequence of keepX of length: 5 for block expr and 5 for block mut and 5 for block meth and 5 for block drug.
## This results in 625 models being fitted for each component and each nrepeat, this may take some time to run, be patient!
my_time<-proc.time()-ptm
print(paste0("Thus it takes ",as.numeric(my_time["elapsed"])/3600,"h to run this piece of code"))
## [1] "Thus it takes 0.3180575h to run this piece of code"

LASSO selected the following numbers of most informative variables from the loading vectors for each OMICs data set for each of the two principal components:

tune.omics$choice.keepX
## $expr
## [1] 5 4
## 
## $mut
## [1] 1 1
## 
## $meth
## [1] 1 5
## 
## $drug
## [1] 1 5

Now let us perform the final sPLS-DA modelling and display PCA plots and loadings. Here for simplicity we will use all available mutations due to its special binary structure.

#list.keepX=list("expr"=tune.omics$choice.keepX$expr,"mut"=tune.omics$choice.keepX$mut,"meth"=tune.omics$choice.keepX$meth,"drug"=tune.omics$choice.keepX$drug)
list.keepX=list("expr"=tune.omics$choice.keepX$expr,"mut"=c(dim(mut)[2],dim(mut)[2]),"meth"=tune.omics$choice.keepX$meth,"drug"=tune.omics$choice.keepX$drug)
res=block.splsda(X=data,Y=Y,ncomp=2,keepX=list.keepX,design=design,near.zero.var=FALSE)
## Design matrix has changed to include Y; each block will be linked to Y.
selectVar(res,block='expr',comp=1)$expr$name
## [1] "CXorf36" "LATS2"   "MAP7D2"  "ERCC6L"  "ULK2"
plotIndiv(res,legend=TRUE,title="CLL Omics",ellipse=FALSE,ind.names=FALSE,cex=2)

Let us look at the loadings:

plotLoadings(res,comp=1,contrib='max',method='median')

plotLoadings(res,comp=2,contrib='max',method='median')

Now we will diplay each individual on the plot of each of the OMICs against each other which are defined by the top loadings from their respective prrincipal components.

plotDiablo(res,ncomp=1)

plotDiablo(res,ncomp=2)

Now let us display so-called “arrow plot” which demonstrates the samples (individuals) in a superimposed manner where each sample will be indicated using an arrow. The start of the arrow indicates the location of the sample in X in one plot, and the tip the location of the sample in Y in the other plot. Short arrows indicate a strong agreement between the matching data sets, long arrows a disagreement between the matching data sets. Such graphic highlight the agreement between all data sets at the sample level, when modelled with DIABLO.

plotArrow(res,ind.names=FALSE,legend=TRUE,title="CLL Omics Integration")

The Arrow Plot can be viewed as a Consensus Plot between all the OMICs, the X and Y coordiantes correspond to common latent variables between the OMICs, i.e. we projected the OMICs into some common latent space where they loose information about their technological origin and hence can be superimposed onto each other.

Now we will display the correlation circle plot, where top loadings variables from each of the OMICs are superimposed. Clustering of variables around the poles of the circle implies strong correlation between the variables from the OMICs data sets. Variables on the opposite poles of the correlation circle plot imply strong anti-correlation.

plotVar(res,var.names=TRUE,style='graphics',legend=TRUE,pch=c(16,17,18,19),cex=c(0.8,0.8,0.8,0.8),col=c('blue','red2',"darkgreen","darkorange"))

For further visualization of the results of integration, we will calculate the so-called “circos plot” that diaplays variable correlation among different OMICs dat sets. Please note that the variables for this plot were selected simultaneously from all the OMICs, i.e. they are not equavivalent to those obtained from each individual OMIC separately.

circosPlot(res,cutoff=0.7,line=FALSE,size.variables=0.5)

Correlation network is another way to demostrate correlations between top loadings of the OMICs data sets in a pairwise fashion.

network(res,blocks=c(1,2),cex.node.name=0.6,color.node=c('blue','red2'),breaks=NULL)

network(res,blocks=c(1,3),cex.node.name=0.6,color.node=c('blue','darkgreen'),breaks=NULL)

network(res,blocks=c(1,4),cex.node.name=0.6,color.node=c('blue','darkorange'),breaks=NULL)

network(res,blocks=c(2,3),cex.node.name=0.6,color.node=c('red2','darkgreen'),breaks=NULL)

network(res,blocks=c(2,4),cex.node.name=0.6,color.node=c('red2','darkorange'),breaks=NULL)

network(res,blocks=c(3,4),cex.node.name=0.6,color.node=c('darkgreen','darkorange'),breaks=NULL)

Finally the correlation heatmap displays strongly correlated blocks of gene expression, methylation and clinical variables markers that provide clustering of individuals into females and males.

cimDiablo(res,margins=c(11,18))

Predict Gender from OMICs Integration

Now it is time for prediction. Once we have trained the PLS-DA model, we can use it and utilize the 60 test samples for making prediction of their geneder and accessing the accuracy of the prediction:

data.test<-list(expr=expr_test,mut=mut_test,meth=meth_test,drug=drug_test)
lapply(data.test, dim) 
## $expr
## [1] 60 32
## 
## $mut
## [1] 60 10
## 
## $meth
## [1] 60 28
## 
## $drug
## [1]  60 310
predict.diablo=predict(res,newdata=data.test,dist='mahalanobis.dist')
auroc.diablo=auroc(res,newdata=data.test,outcome.test=Y.test,plot=TRUE,roc.comp=c(1),roc.block=c(1,2,3,4))

data.frame(predict.diablo$class,Truth=Y.test)
##      mahalanobis.dist.expr.comp.1 mahalanobis.dist.expr.comp.2
## H102                            f                            f
## H040                            f                            f
## H165                            f                            f
## H018                            f                            f
## H101                            f                            f
## H096                            f                            f
## H033                            f                            f
## H210                            m                            m
## H225                            f                            f
## H170                            m                            m
## H181                            m                            m
## H182                            m                            m
## H204                            m                            m
## H023                            f                            f
## H051                            f                            f
## H031                            f                            f
## H209                            m                            m
## H035                            f                            f
## H081                            f                            f
## H029                            f                            f
## H014                            f                            f
## H186                            m                            m
## H166                            m                            m
## H167                            f                            f
## H016                            m                            m
## H044                            m                            m
## H032                            m                            m
## H055                            m                            m
## H254                            m                            m
## H024                            m                            m
## H183                            m                            m
## H030                            m                            m
## H252                            m                            m
## H272                            m                            m
## H231                            m                            m
## H107                            m                            m
## H094                            m                            m
## H109                            m                            m
## H260                            f                            f
## H078                            m                            m
## H080                            m                            m
## H208                            m                            m
## H072                            m                            m
## H020                            m                            m
## H059                            m                            m
## H235                            m                            m
## H206                            m                            m
## H202                            m                            m
## H106                            m                            m
## H087                            m                            m
## H049                            m                            m
## H104                            m                            m
## H038                            m                            m
## H037                            m                            m
## H250                            m                            m
## H191                            m                            m
## H017                            m                            m
## H176                            m                            m
## H070                            m                            m
## H178                            m                            m
##      mahalanobis.dist.mut.comp.1 mahalanobis.dist.mut.comp.2
## H102                           f                           f
## H040                           m                           m
## H165                           m                           m
## H018                           m                           m
## H101                           m                           m
## H096                           m                           m
## H033                           m                           m
## H210                           m                           m
## H225                           m                           m
## H170                           m                           m
## H181                           m                           m
## H182                           m                           m
## H204                           m                           m
## H023                           f                           f
## H051                           f                           f
## H031                           m                           m
## H209                           m                           m
## H035                           m                           f
## H081                           m                           m
## H029                           f                           f
## H014                           f                           f
## H186                           m                           m
## H166                           m                           m
## H167                           m                           m
## H016                           m                           f
## H044                           f                           f
## H032                           m                           m
## H055                           m                           m
## H254                           m                           f
## H024                           m                           m
## H183                           f                           f
## H030                           f                           f
## H252                           f                           f
## H272                           f                           f
## H231                           f                           f
## H107                           f                           m
## H094                           m                           m
## H109                           f                           m
## H260                           f                           f
## H078                           f                           f
## H080                           f                           f
## H208                           m                           m
## H072                           f                           f
## H020                           m                           m
## H059                           m                           m
## H235                           m                           f
## H206                           m                           m
## H202                           m                           m
## H106                           m                           m
## H087                           m                           m
## H049                           m                           m
## H104                           f                           m
## H038                           m                           m
## H037                           m                           m
## H250                           m                           m
## H191                           m                           m
## H017                           f                           f
## H176                           m                           m
## H070                           m                           m
## H178                           f                           f
##      mahalanobis.dist.meth.comp.1 mahalanobis.dist.meth.comp.2
## H102                            f                            f
## H040                            m                            m
## H165                            f                            m
## H018                            m                            m
## H101                            f                            f
## H096                            m                            m
## H033                            m                            m
## H210                            m                            m
## H225                            m                            m
## H170                            m                            m
## H181                            m                            f
## H182                            m                            f
## H204                            m                            f
## H023                            f                            f
## H051                            m                            m
## H031                            f                            f
## H209                            f                            f
## H035                            f                            f
## H081                            m                            f
## H029                            f                            m
## H014                            m                            m
## H186                            m                            f
## H166                            f                            f
## H167                            f                            m
## H016                            m                            m
## H044                            m                            m
## H032                            m                            m
## H055                            m                            m
## H254                            m                            f
## H024                            m                            m
## H183                            m                            m
## H030                            m                            m
## H252                            f                            f
## H272                            f                            f
## H231                            f                            f
## H107                            m                            m
## H094                            m                            m
## H109                            m                            m
## H260                            f                            f
## H078                            f                            m
## H080                            m                            f
## H208                            m                            m
## H072                            f                            m
## H020                            m                            m
## H059                            f                            f
## H235                            m                            m
## H206                            f                            f
## H202                            f                            m
## H106                            f                            f
## H087                            m                            m
## H049                            f                            m
## H104                            f                            m
## H038                            m                            f
## H037                            m                            m
## H250                            m                            f
## H191                            f                            f
## H017                            m                            m
## H176                            m                            f
## H070                            f                            m
## H178                            m                            m
##      mahalanobis.dist.drug.comp.1 mahalanobis.dist.drug.comp.2 Truth
## H102                            f                            f     f
## H040                            m                            m     f
## H165                            f                            f     f
## H018                            m                            m     f
## H101                            m                            m     f
## H096                            m                            m     f
## H033                            m                            m     f
## H210                            m                            m     f
## H225                            m                            m     f
## H170                            m                            m     f
## H181                            f                            f     f
## H182                            m                            m     f
## H204                            m                            m     f
## H023                            f                            f     f
## H051                            f                            f     f
## H031                            f                            f     f
## H209                            f                            m     f
## H035                            m                            f     f
## H081                            m                            m     f
## H029                            f                            f     f
## H014                            f                            f     f
## H186                            m                            m     f
## H166                            m                            m     f
## H167                            f                            f     f
## H016                            m                            m     m
## H044                            f                            f     m
## H032                            m                            m     m
## H055                            m                            m     m
## H254                            m                            m     m
## H024                            m                            m     m
## H183                            f                            f     m
## H030                            f                            f     m
## H252                            f                            f     m
## H272                            f                            f     m
## H231                            f                            f     m
## H107                            f                            f     m
## H094                            m                            m     m
## H109                            f                            f     m
## H260                            f                            f     m
## H078                            f                            f     m
## H080                            f                            f     m
## H208                            f                            m     m
## H072                            m                            m     m
## H020                            m                            m     m
## H059                            m                            m     m
## H235                            m                            m     m
## H206                            m                            m     m
## H202                            m                            m     m
## H106                            m                            m     m
## H087                            m                            m     m
## H049                            m                            m     m
## H104                            m                            m     m
## H038                            f                            f     m
## H037                            m                            m     m
## H250                            m                            m     m
## H191                            f                            f     m
## H017                            f                            f     m
## H176                            m                            m     m
## H070                            m                            m     m
## H178                            f                            f     m
table(predict.diablo$MajorityVote$mahalanobis.dist[,1],Y.test)
##    Y.test
##      f  m
##   f  8  5
##   m 13 20
round((sum(diag(table(predict.diablo$MajorityVote$mahalanobis.dist[,1],Y.test)))/sum(table(predict.diablo$MajorityVote$mahalanobis.dist[,1],Y.test)))*100)
## [1] 61

Therefore the the success rate of the first predictive component is 61%, it is quite high and provides new candidate bio-markers (see the variable plot) for understanding of mechanisms CLL pathogenesis.

sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=sv_SE.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=sv_SE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=sv_SE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=sv_SE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] glmnet_2.0-18      foreach_1.4.7      Matrix_1.2-17     
## [4] matrixStats_0.54.0 mixOmics_6.3.1     ggplot2_3.2.1     
## [7] lattice_0.20-38    MASS_7.3-51.1     
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5        xfun_0.8               
##  [3] purrr_0.3.2             reshape2_1.4.3         
##  [5] colorspace_1.4-1        snow_0.4-3             
##  [7] miniUI_0.1.1.1          htmltools_0.3.6        
##  [9] yaml_2.2.0              rlang_0.4.0            
## [11] manipulateWidget_0.10.0 pillar_1.4.2           
## [13] later_0.8.0             glue_1.3.1             
## [15] withr_2.1.2             RColorBrewer_1.1-2     
## [17] plyr_1.8.4              stringr_1.4.0          
## [19] munsell_0.5.0           gtable_0.3.0           
## [21] htmlwidgets_1.3         codetools_0.2-16       
## [23] evaluate_0.14           labeling_0.3           
## [25] knitr_1.24              httpuv_1.5.1           
## [27] crosstalk_1.0.0         parallel_3.6.1         
## [29] rARPACK_0.11-0          Rcpp_1.0.2             
## [31] xtable_1.8-4            corpcor_1.6.9          
## [33] scales_1.0.0            promises_1.0.1         
## [35] webshot_0.5.1           jsonlite_1.6           
## [37] RSpectra_0.15-0         mime_0.7               
## [39] gridExtra_2.3           ellipse_0.4.1          
## [41] digest_0.6.20           stringi_1.4.3          
## [43] dplyr_0.8.3             shiny_1.3.2            
## [45] grid_3.6.1              tools_3.6.1            
## [47] magrittr_1.5            rgl_0.100.26           
## [49] lazyeval_0.2.2          tibble_2.1.3           
## [51] crayon_1.3.4            tidyr_0.8.3            
## [53] pkgconfig_2.0.2         iterators_1.0.12       
## [55] assertthat_0.2.1        rmarkdown_1.14         
## [57] R6_2.4.0                igraph_1.2.4.1         
## [59] compiler_3.6.1